
# load ITANES data (Mancini and Roncarolo 2018, Tab. 3.2)
itanes_data <- read.csv("./itanes_data.csv")

# convert table to absolute values keeping only parties and newspapers 
itanes <- itanes_data[,2:12]/itanes_data[,12]
itanes <- itanes*itanes_data[,13]
itanes_data[,2:12] <- itanes
itanes_data <- itanes_data[1:11,c(1:3, 5:8)]
rownames(itanes_data) <- itanes_data[,1]
itanes_data <- itanes_data[,-1]
itanes_data <- round(itanes_data)
sum(itanes_data) # sample: 3597
rm(itanes)


# Principal Component Analysis ####

library(FactoMineR)
library(factoextra)

# ITANES ====

itanes <- itanes_data/rowSums(itanes_data)

itanes <- itanes[order(row.names(itanes)), ]

itanes_pca <- PCA(itanes, graph = FALSE)

fviz_pca_var(itanes_pca, title = "ITANES")

fviz_pca_biplot(itanes_pca, title = "ITANES", repel = TRUE)

# MP-MPAS =====

mpmpas <- readRDS("./data/mediasources_mpas_insularity.rds")

# select parties included in Itanes survey
mpmpas <- mpmpas[, c(1, 6, 9, 8, 4, 7, 3)]

# normalize data (0-1)
mpmpas[,2:7] <- mpmpas[,2:7]/rowSums(mpmpas[,2:7])

mpmpas[mpmpas$domain == "corriere.it", "newspaper"] <- "corriere.it"
mpmpas[mpmpas$domain == "repubblica.it", "newspaper"] <- "repubblica.it"
mpmpas[mpmpas$domain == "lastampa.it", "newspaper"] <- "lastampa.it"
mpmpas[mpmpas$domain == "ilsole24ore.com", "newspaper"] <- "ilsole24ore.com"
mpmpas[mpmpas$domain == "ilgiornale.it", "newspaper"] <- "ilgiornale.it"
mpmpas[mpmpas$domain == "liberoquotidiano.it", "newspaper"] <- "liberoquotidiano.it"
mpmpas[mpmpas$domain == "ilfattoquotidiano.it", "newspaper"] <- "ilfattoquotidiano.it"
mpmpas[mpmpas$domain == "ilmessaggero.it", "newspaper"] <- "ilmessaggero.it"
mpmpas[mpmpas$domain == "avvenire.it", "newspaper"] <- "avvenire.it"
mpmpas[mpmpas$domain == "ilrestodelcarlino.it", "newspaper"] <- "ilrestodelcarlino.it"
mpmpas[mpmpas$domain == "leggo.it", "newspaper"] <- "leggo.it"

rownames(mpmpas) <- mpmpas$domain
mpmpas$domain <- NULL 

mpmpas_pca <- PCA(mpmpas, quali.sup = 7, graph = FALSE)

fviz_pca_var(mpmpas_pca, title = "MP-MPAS")

name <- list(name = c("corriere.it", "repubblica.it", "lastampa.it", 
                      "ilsole24ore.com", "ilgiornale.it", "liberoquotidiano.it",
                      "ilfattoquotidiano.it", "ilmessaggero.it", "avvenire.it",
                      "ilrestodelcarlino.it","leggo.it"))

fviz_pca_biplot(mpmpas_pca, select.ind = name, title = "MP-MPAS",
                repel = TRUE)

# correlation between ITANES and MP-MPAS first two PCA components ====

mpmpas_coord <- mpmpas_pca$var$coord
itanes_coord <- itanes_pca$var$coord

mpmpas_coord <- mpmpas_coord[order(rownames(mpmpas_coord)),] 
itanes_coord <- itanes_coord[order(rownames(itanes_coord)),] 

cor1 <- cor.test(mpmpas_coord[,1], itanes_coord[,1])
cor2 <- cor.test(mpmpas_coord[,2], itanes_coord[,2])

cor1; round(cor1$estimate, 2)
cor2; round(cor2$estimate, 2)

####################
# Chi-squared test
####################

# expected values
voters_proportion <- colSums(itanes_data)/sum(colSums(itanes_data))

# chi-squared test of goodness-of-fit
Avvenire <- chisq.test(itanes_data["Avvenire",], p = voters_proportion)
Corriere_della_Sera <- chisq.test(itanes_data["Corriere della Sera",], p = voters_proportion)
Il_Fatto_Quotidiano <- chisq.test(itanes_data["Il Fatto Quotidiano",], p = voters_proportion)
Il_Giornale <- chisq.test(itanes_data["Il Giornale",], p = voters_proportion)
Il_Messaggero <- chisq.test(itanes_data["Il Messaggero",], p = voters_proportion)
Il_Sole_24ore <- chisq.test(itanes_data["Il Sole 24 Ore",], p = voters_proportion)
Leggo <- chisq.test(itanes_data["Leggo",], p = voters_proportion)
Libero <- chisq.test(itanes_data["Libero",], p = voters_proportion)
QN_Resto_del_Carlino <- chisq.test(itanes_data["QN Il Resto del Carlino",], p = voters_proportion)
Repubblica <- chisq.test(itanes_data["La Repubblica",], p = voters_proportion)
Stampa <- chisq.test(itanes_data["La Stampa",], p = voters_proportion)

# standardized chi squared residuals critical values (Bonferroni correction) 
p.value <- 0.001/6 
z.critical_001 <- qnorm(1 - (p.value/2))
p.value <- 0.01/6
z.critical_01 <- qnorm(1 - (p.value/2))
p.value <- 0.05/6 
z.critical_05 <- qnorm(1 - (p.value/2))

standardized_residuals <- data.frame("Avvenire" = Avvenire$stdres, 
                                     "Corriere_della_Sera" = Corriere_della_Sera$stdres,
                                     "Il_Fatto_Quotidiano" = Il_Fatto_Quotidiano$stdres,
                                     "Il_Giornale" = Il_Giornale$stdres,
                                     "Il_Messaggero" = Il_Messaggero$stdres,
                                     "Il_Sole_24ore" = Il_Sole_24ore$stdres,
                                     "Leggo" = Leggo$stdres,
                                     "Libero" = Libero$stdres,
                                     "QN_Resto_del_Carlino" = QN_Resto_del_Carlino$stdres,
                                     "Repubblica" = Repubblica$stdres,
                                     "Stampa" = Stampa$stdres)


write.csv2(round(standardized_residuals, 2), file = "itanes_standardized_residuals.csv")

